package com.yueke.gemini.spide;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import us.codecraft.webmagic.selector.Html;

import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;

/**
 * 爬虫基本实现
 */
public class HttpGetDemo {

    public static void main(String[] args) {
        StringBuilder sb=new StringBuilder();
        CloseableHttpClient httpClient = HttpClients.createDefault();
        HttpGet get = new HttpGet("http://www.okooo.com/jingcai/");
        get.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36");
        InputStreamReader isr = null;
        try {
            CloseableHttpResponse response = httpClient.execute(get);
            Header[] headers = response.getHeaders("Content-Type");
            System.out.println(headers[0].getValue());
            HttpEntity entity = response.getEntity();
            isr=new InputStreamReader(entity.getContent(),"GB2312");
            char[] cbuf = new char[1024];
            while (isr.read(cbuf) > 0) {
                sb.append(new String(cbuf).toString());
            }
            EntityUtils.consume(entity);
        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            try {
                isr.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }


        List<String> all = Html.create(sb.toString()).$(".touzhu_1").all();
        for (String string : all) {
            String string2 = Html.create(string).$("div .liansai a").get();

            System.out.println(string2.split(">")[1].split("</")[0]);
        }


    }

}

